# Load dataset we use in analysis, restrict to fields *not* from ZTRAX, and save as a public-facing data for replication package

pacman::p_load(tidyverse, qs, sf)

source("code/globals.R")

data <- qread(file.path(WORKING, "analysisdataset.qs"))

data <- data %>%
  filter(in_damage_data == 1) %>%
  select(FIPS, APN_orig, address_orig, lon_orig, lat_orig, incidentid, incidentname, incidentstartdate, damaged = destroyed, sample_main)

# Note that for this public-facing dataset, we report our "destroyed" variable from the paper as "damaged" to limit user confusion, since this variable is a binary indicator of whether a home was damaged or not. In practice, as we discuss in the paper, most homes that are damaged to any degree by fire must be significantly or completely rebuilt, so the distinction between "damaged" and "destroyed" is not particularly meaningful.

# Apply same restriction we use for regressions
regs <- data %>% filter(sample_main) %>% select(-sample_main)

write_csv(regs, file.path("damage-data-public/damaged-homes-public.csv"))

# Load shapefile of cleaned perimeters
perims <- qread(file.path(WORKING, "cleanedperimeters.qs"))

# Rename fields to match damage data
perims <- perims %>% rename(incidentid = UNIT_AND_NUMBER, perim_name = FireNamePerim, perim_year = yearperim, perim_source = datasource)

# Keep only perims we have in damage data
incs_in_regs <- unique(regs$incidentid)
perims <- perims %>% filter(incidentid %in% incs_in_regs)

# Write perims to public file
write_sf(perims, "damage-data-public/damaged-homes-perimeters.gpkg")
